rm(list = ls())

library(tidyverse)
library(lubridate)
library(here)
library(data.table)
library(qs)

# Load data ---------------------------------------------------------------

parliament <- qread(here("data", "ticino_parliament", "ticino_clean.qs"))

cb <- qread(here("data", "cb.qs"))
cb <- cb %>% select(bfs19, BR, distmin, distkm, travelMUNmin, travelMUNkm, travelEXACTmin, travelEXACTkm, canton_name)
cb <- cb %>% group_by(bfs19) %>% filter(row_number() == 1)

munnames <- read_csv(here("data", "cleaning", "MuniCodes.csv"))
munnames <- munnames %>% select(munnr2012, munname2012)
munnames <- munnames %>% distinct()

crosswalk <- read_csv(here("data", "cw_munnr12_bfs19.csv"))
crosswalk <- crosswalk %>% left_join(munnames)
crosswalk <- crosswalk %>% left_join(cb)


# Fix names of municipalities ---------------------------------------------

#Municipalities that exist in parliament but not in crosswalk

crosswalk_munname <- crosswalk$munname2012

parliament %>% filter(Domicilio %in% crosswalk_munname)

parliament %>% filter(!Domicilio %in% crosswalk_munname) %>% pull(Domicilio)

parliament$Domicilio[which(!parliament$Domicilio %in% crosswalk_munname)] %>% unique %>% sort

parliament$year[which(!parliament$Domicilio %in% crosswalk_munname)] %>% unique

parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Collina dOro" = "Collina d'Oro"))

parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Ronco s/Ascona" = "Ronco sopra Ascona"))

parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "S. Antonino" = "Sant'Antonino",
                                                      "Sant' Antonino" = "Sant'Antonino"))


#The following municpalities were merged in 2005 to form Blenio: https://en.wikipedia.org/wiki/Campo_Blenio
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Campo (Blenio)" = "Blenio", "Campo Blenio" = "Blenio", "Olivone" = "Blenio"))

#The following municipalities were merged in 2005 to form Alto Malcantone: https://en.wikipedia.org/wiki/Arosio,_Switzerland 
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Arosio" = "Alto Malcantone"))

#The following municipalities were merged in 2009 into Mendrisio: https://en.wikipedia.org/wiki/Arzo
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Arzo" = "Mendrisio", "Genestrerio" = "Mendrisio"))

#The following municipalities was merged in 2001 into Mendrisio: https://it.wikipedia.org/wiki/Salorino_(Mendrisio)
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Salorino" = "Mendrisio"))

#The following municipalities were merged in 2008 into Capriasca: https://it.wikipedia.org/wiki/Bidogno
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Bidogno" = "Capriasca"))

#The following municipalities were merged in 2001 to form Capriasca: https://en.wikipedia.org/wiki/Sala_Capriasca
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Sala Capriasca" = "Capriasca",
                                                      "Tesserete" = "Capriasca"))

#The following merged in 2004 into Lugano: https://en.wikipedia.org/wiki/Breganzona
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Breganzona" = "Lugano"))

#The following merged in 2004 into Acquarossa: https://en.wikipedia.org/wiki/Corzoneso
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Corzoneso" = "Acquarossa"))

#The following merged in 2004 into Collina d'Oro: https://en.wikipedia.org/wiki/Gentilino
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Gentilino" = "Collina d'Oro", "Montagnola" = "Collina d'Oro"))

#The following merged in 2009 into Cugnasco-Gerra: https://en.wikipedia.org/wiki/Gerra_(Verzasca)
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Gerra (Verzasca)" = "Cugnasco-Gerra", "Gerra Verzasca" = "Cugnasco-Gerra"))

#The following merged in 2009 into Centovalli: https://en.wikipedia.org/wiki/Intragna
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Intragna" = "Centovalli"))

#The following merged in 2004 into Lugano: https://en.wikipedia.org/wiki/Pambio_Noranco
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Pambio-Noranco" = "Lugano"))

#The following merged in 2004 into Lugano: https://en.wikipedia.org/wiki/Pazzallo
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Pazzallo" = "Lugano"))

#The following merged in 2004 into Lugano: https://en.wikipedia.org/wiki/Pregassona
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Pregassona" = "Lugano"))

#The following merged in 2004 into Lugano: https://en.wikipedia.org/wiki/Rivera,_Switzerland
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Rivera" = "Monteceneri"))

#The following merged in 2010 into Gambarogno: https://en.wikipedia.org/wiki/Sant%27Abbondio
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "S. Abbondio" = "Gambarogno"))

#The following merged in 2010 into Gambarogno: https://en.wikipedia.org/wiki/Sant%27Abbondio
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "S. Abbondio" = "Gambarogno"))

#The following merged in 2004 into Lugano: https://en.wikipedia.org/wiki/Viganello
parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Viganello" = "Lugano"))

gc()



#The following municipalities were formed after the 2012
#bfs19 codes: Riviera: 5287
#bfs19 codes: Serravalle: 5050
#bfs19 codes: Terre di Pedemonte: 5396
##All the former municipalities that constituted the same municipalities have same border region/treatment group so will assign each to one of original municipalities
parliament$Domicilio[which(!parliament$Domicilio %in% crosswalk$munname2012)] %>% unique %>% sort

parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Riviera" = "Cresciano"))

parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Serravalle" = "Ludiano"))

parliament <- parliament %>% mutate(Domicilio = recode(Domicilio, "Terre di Pedemonte" = "Cavigliano"))


crosswalk <- crosswalk %>% select(bfs19, munname2012, BR, distmin, distkm, travelMUNmin, travelMUNkm, travelEXACTmin, travelEXACTkm)
crosswalk <- crosswalk %>% group_by(munname2012) %>% filter(row_number() == 1)


#merge
parliament <- parliament %>% left_join(crosswalk, by = c("Domicilio" = "munname2012"))

gc()


# Migration related terms -------------------------------------------------
parliament$title <- parliament$title %>% tolower()

imm_words <- c("migrant", "immigrat", "frontalier", "frontier", "Schengen", "cittadin", "migrator", "immigrazion", "rimpatri",
          "estero", "stranier") %>% paste0(collapse = "|")

parliament$immigration <- str_detect(parliament$title, imm_words)

titles <- parliament %>% 
  filter(author_final != "") %>% 
  filter(year != "2022") %>% 
  filter(immigration == 1) %>% 
  group_by(id) %>% 
  slice_head(n = 1) %>% 
  select(title, date, year, author_original) 

titles$title <- titles$title %>% str_remove_all("\\n+|\\t+")

 
#Then, a research assistant handcoded each title to examine whether it's related to immigration or not, which we load here
handcoded <- read.csv(here("data", "ticino_parliament", "ticino_handcoded.csv"), stringsAsFactors = F)

parliament <- parliament %>% left_join(handcoded %>% rename(immigration_handcoded = immigration..0..1.) %>% select(id, immigration_handcoded))

parliament$immigration_handcoded[is.na(parliament$immigration_handcoded)] <- 0

parliament$immigration_handcoded <- parliament$immigration_handcoded %>% as.numeric %>% as.logical()

parliament$border <- ifelse(parliament$travelMUNmin <= 15 & parliament$BR == 1, 1, 0)
parliament$border %>% table


# Fix data shape ----------------------------------------------------------
#Goal is to have the following data structure: Each resolution has two rows: border = 0 and border = 1. 
#If any border candidates sponsored the resolution AND the resolution addresses migration, migration column will be coded as 1 for the border row. Same with the non-border row.

#1- Filter to resolutions that had a defined border region: This removes those with no authors and those with unidentified authors

parliament <- parliament %>% filter(travelMUNmin <= 30) %>% filter(BR == 1)
parliament <- parliament %>% filter(!is.na(border))

#2- Extract to resolutions that are all supported by the same region (2 regions: border and non-border). Note this includes resolutions that had one sponsor
parliament_same <- parliament %>% 
  group_by(id) %>% 
  mutate(distinct_border = n_distinct(border)) %>%  #Find number of distinct border values within each group
  filter(distinct_border == 1) %>% #Filter to cases where there's only one border value
  group_by(id, border) %>% 
  slice_head() #Take the first row in each id-border group

#Create the same data frame above, but add the other border region to it. Then add a migration column = FALSE to it (because people from the other border region did not vote on these resolutions)
parliament_same2 <- parliament_same %>% mutate(border = 1-border,
                                              immigration = FALSE, 
                                              immigration_handcoded = FALSE)

#Check: Those border regions in same should same as non-border regions in same2
parliament_same$border %>% table
parliament_same2$border %>% table

#Check: Immigration should be false in same2
parliament_same$immigration %>% table %>% sum
parliament_same2$immigration %>% table

parliament_same$immigration_handcoded %>% table %>% sum
parliament_same2$immigration_handcoded %>% table

#Bind the two data frames
parliament_same_final <- parliament_same %>% bind_rows(parliament_same2)

#Check: Each id should have 2 rows
parliament_same_final %>% group_by(id) %>% mutate(n = n()) %>% pull(n) %>% table

#Check: Border and non-border regions should be the same length
parliament_same_final$border %>% table

#3- Create parliament that does not include the resolutions with distinct border region
parliament_different <- parliament %>% 
  filter(!id %in% parliament_same_final$id)

#Check distinct border regions: Should be 2 for each resolution
parliament_different %>% 
  group_by(id) %>% 
  mutate(distinct_border = n_distinct(border)) %>% 
  pull(distinct_border) %>% table

#Check: migration status should be the same for each resolution here
parliament_different %>% 
  group_by(id) %>% 
  mutate(distinct_migration = n_distinct(immigration)) %>% 
  pull(distinct_migration) %>% 
  table

parliament_different %>% 
  group_by(id) %>% 
  mutate(n = n()) %>% 
  pull(n) %>% 
  summary

#4- Group this data frame by ID, border and select first row
parliament_different <- parliament_different %>%
  group_by(id, border) %>% 
  slice_head()

#Check: Each resolution should now have 2 rows
parliament_different %>% 
  group_by(id) %>% 
  mutate(n = n()) %>% 
  pull(n) %>% 
  summary

#5- Merge the two data frames and remove incorrect columns (because we sliced to first row)
parliament_final <- parliament_same_final %>% bind_rows(parliament_different)

parliament_final %>% filter(immigration_handcoded == T) %>% pull(id) %>% unique %>% length
parliament %>% filter(immigration_handcoded == T) %>% pull(id) %>% unique %>% length

## Add columns for transition/open borders ---------------------------------

#Remove unnecessary columns
parliament_final <- parliament_final %>% select(-c(author_final, author, Cognome, Nome, Lista, Domicilio, adist, bfs19, BR, distmin, distkm, 
                                                  travelMUNmin, travelMUNkm, travelEXACTmin, travelEXACTkm, distinct_border))

#Fix ID variable after all the bills that have been removed
parliament_final <- parliament_final %>% arrange(date) %>% group_by(year, id) %>% mutate(id2 = paste0(year, "-", cur_group_id())) %>% ungroup() %>% select(-id) %>% rename(id = id2)

parliament_final$immigration <- parliament_final$immigration %>% as.numeric
parliament_final$immigration_handcoded <- parliament_final$immigration_handcoded %>% as.numeric

parliament_final <- parliament_final %>% 
  mutate(transition = ifelse(year %in% 2000:2003, 1, 0),
         free = ifelse(year > 2003, 1, 0),
         transition_border = transition * border,
         free_border = free * border
  )

for(i in 1987:2022){
  parliament_final[parliament_final$year == i, paste0("border_", i)] <- parliament_final$border[which(parliament_final$year == i)]
}

#Replace NAs with 0s for the new columns
parliament_final <- parliament_final %>% 
  mutate_at(vars(matches("^border_\\d")), ~replace(., is.na(.), 0))

parliament_final %>% qsave(here("data", "ticino_parliament_main.qs"))

# Alternative Data Shape:  ---------------------------------
#Make a dataset for resolutions sponsored by border MPs and another dataset for resolutions sponsored by non-border MPs. There can be intersection
#Effects here will be relative to resolutions at the region level (for two regions: border adjacent and non-border adjacent/control)

#For this dataset, we will also report results by sponsorship far-right parties (in addition to regular results)

fr_lista <- c("LEGA", "Lega dei ticinesi", "LEGA DEI TICINESI", "UDC", "Unione democratica di centro",
             "UDC - UDF", "La Destra (UDC-UDF-AL)")
  

parliament$fr_parties <- 0
parliament$fr_parties[which(parliament$Lista %in% fr_lista)] <- 1

parliament_border_fr <- parliament %>% filter(border == 1, fr_parties == 1)
parliament_border_nofr <- parliament %>% filter(border == 1, fr_parties == 0)
parliament_noborder_fr <- parliament %>% filter(border == 0, fr_parties == 1)
parliament_noborder_nofr <- parliament %>% filter(border == 0, fr_parties == 0)

parliament_border_fr <- parliament_border_fr %>% group_by(id) %>% mutate(n_authors = n()) %>% slice(1)
parliament_border_nofr <- parliament_border_nofr %>% group_by(id) %>% mutate(n_authors = n()) %>% slice(1)
parliament_noborder_fr <- parliament_noborder_fr %>% group_by(id) %>% mutate(n_authors = n()) %>% slice(1)
parliament_noborder_nofr <- parliament_noborder_nofr %>% group_by(id) %>% mutate(n_authors = n()) %>% slice(1)

parliament_final <- parliament_border_fr %>% bind_rows(parliament_border_nofr) %>%
  bind_rows(parliament_noborder_fr) %>% bind_rows(parliament_noborder_nofr)

parliament_final %>% 
  group_by(id) %>% 
  summarise(n = n()) %>% 
  pull(n) %>% 
  table

## Add columns for transition/open borders ---------------------------------

#Remove unnecessary columns
parliament_final <- parliament_final %>% select(-c(author_final, author, Cognome, Nome, Lista, Domicilio, adist, bfs19, BR, distmin, distkm, 
                                                  travelMUNmin, travelMUNkm, travelEXACTmin, travelEXACTkm))

#Fix ID variable after all those that have been removed
parliament_final <- parliament_final %>% arrange(date) %>% group_by(year, id) %>% mutate(id2 = paste0(year, "-", cur_group_id())) %>% ungroup() %>% select(-id) %>% rename(id = id2)

parliament_final$immigration <- parliament_final$immigration %>% as.numeric
parliament_final$immigration_handcoded <- parliament_final$immigration_handcoded %>% as.numeric

parliament_final <- parliament_final %>% 
  mutate(transition = ifelse(year %in% 2000:2003, 1, 0),
         free = ifelse(year > 2003, 1, 0),
         transition_border = transition * border,
         free_border = free * border)

for(i in 1987:2022){
  parliament_final[parliament_final$year == i, paste0("border_", i)] <- parliament_final$border[which(parliament_final$year == i)]
}

#Replace NAs with 0s for the new columns
parliament_final <- parliament_final %>% 
  mutate_at(vars(matches("^border_\\d")), ~replace(., is.na(.), 0))

parliament_final <- parliament_final %>% group_by(id, border) %>% slice(1) %>% ungroup

parliament_final %>% qsave(here("data", "ticino_parliament_byregion.qs"))

parliament_final %>% group_by(id) %>% summarise(imm = max(immigration_handcoded)) %>% pull(imm) %>% sum

parliament_final %>% group_by(id) %>% mutate(distinct_migration = n_distinct(immigration)) %>% pull(distinct_migration) %>% table

parliament %>% 
  filter(author_final != "") %>% 
  group_by(id) %>% 
  slice_tail() %>% 
  mutate(immigration = FALSE) %>% 
  bind_rows(parliament)




